## vector with package names
x <- c("pbapply", "viridis", "knitr", "kableExtra", "ggplot2", "ape", "rwty")
aa <- lapply(x, function(y) {
# check if installed, if not then install
if (!y %in% installed.packages()[,"Package"])
install.packages(y)
# load package
try(require(y, character.only = T), silent = T)
})
knitr::opts_knit$set(root.dir = normalizePath(".."))
knitr::opts_chunk$set(dpi = 38, fig.width = 18, fig.height = 10, echo = TRUE)
options(knitr.kable.NA = '')
theme_set(theme_light(base_size = 30))
source('./source/custom_treespace.R')
# read data to order leks by number of years sampled
fs <- read.csv("./data/processed/fossil_series.csv", header = T, sep = ",")
years_by_lek <- aggregate(Sampled ~ Lek, data = fs, FUN = sum)
years_by_lek <- years_by_lek[order(years_by_lek$Sampled, decreasing = TRUE), ]
rb.trees <- readRDS("./output/revbayes_output_in_single_R_object.RDS")
rb.trees <- rb.trees[grep("run_", names(rb.trees), invert = TRUE, value = TRUE)]
trees_diags <- data.frame(model = names(rb.trees))
# get lek name
trees_diags$lek <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 1)
# substitution model
trees_diags$subs <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 2)
# period
trees_diags$period <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 3)
# and which fossils were used
trees_diags$fossils <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 4)
trees_diags$align <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 5)
trees_diags$n_trees <- sapply(rb.trees, function(x) length(x$trees))
# Number of models by iterations and lek
kbl <- knitr::kable(as.matrix(table(trees_diags$n_trees, trees_diags$lek)), caption = "Number of models by lek and number of trees")
kableExtra::kable_styling(kbl)
| BR1 | CCE | HC1 | SUR | TR1 | |
|---|---|---|---|---|---|
| 3002 | 18 | 0 | 24 | 6 | 18 |
| 6002 | 0 | 24 | 0 | 24 | 0 |
#selected leks
sel_leks <- c("SUR", "CCE", "HC1", "BR1", "TR1")
tree_names <- grep("early", names(rb.trees), invert = TRUE, value = TRUE)
pnts_lks <- lapply(sel_leks, function(i) {
print(i)
lek_trees <- grep(i, tree_names, value = TRUE)
multi_list <- lapply(lek_trees, function(x) read.tree(file.path("./output/most_recent_revbayes_models/", x), keep.multi = TRUE))
names(multi_list) <- sapply(basename(lek_trees), function(x) paste(strsplit(x, "_", fixed = TRUE)[[1]][1:5], collapse = "_"))
pnts <- try(custom_treespace(multi_list, n.points = 100, method = "RF", cl = 10), silent = TRUE)
if (!is(pnts, "try-error"))
return(pnts) else
return(NULL)
})
names(pnts_lks) <- sel_leks
sapply(pnts_lks, is.null)
saveRDS(pnts_lks, file = "./output/topology/tree_distance_all_fossils_pairwise_shared.RDS")
pnts_lks <- readRDS("./output/topology/tree_distance_all_fossils_pairwise_shared.RDS")
# fix model names
pnts_lks <- lapply(pnts_lks, function(x) {
# rename alignment models
x$chain <- gsub("all.equal", "MAFFT-agnostic", x$chain)
x$chain <- gsub("optimal", "MAFFT-optimal", x$chain)
x$chain <- gsub("prank", "PRANK-TN93", x$chain)
x$chain <- gsub("new", "recent records", x$chain)
x$chain <- gsub("old", "historical records", x$chain)
x$chain <- gsub("Uexp", "relaxed", x$chain)
x$chain <- gsub("_all", "", x$chain)
x$chain <- gsub("_", "/", x$chain)
return(x)
})
ggs <- lapply(pnts_lks, function(X) {
ggplot(data = X, aes(x = x, y = y, fill = generation)) +
geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
scale_colour_gradient(low = "red", high = "yellow") +
geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
facet_wrap(~chain, nrow = 4) +
scale_fill_gradientn(colours = viridis(256)) +
labs(x = "MDS_v1", y = "MDS_v2") +
ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain))))
})
ggs
## $SUR
##
## $CCE
##
## $HC1
##
## $BR1
##
## $TR1
ggs <- lapply(pnts_lks, function(X) {
X <- X[grep("historical records", X$chain), ]
if (nrow(X) > 0)
ggplot(data = X, aes(x = x, y = y, fill = generation)) +
geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
scale_colour_gradient(low = "red", high = "yellow") +
geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
facet_wrap(~chain, nrow = 4) +
scale_fill_gradientn(colours = viridis(256)) +
labs(x = "MDS_v1", y = "MDS_v2") +
ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
})
ggs
## $SUR
##
## $CCE
##
## $HC1
##
## $BR1
## NULL
##
## $TR1
## NULL
ggs <- lapply(pnts_lks, function(X) {
X <- X[grep("recent records", X$chain), ]
ggplot(data = X, aes(x = x, y = y, fill = generation)) +
geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
scale_colour_gradient(low = "red", high = "yellow") +
geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
facet_wrap(~chain, nrow = 4) +
scale_fill_gradientn(colours = viridis(256)) +
labs(x = "MDS_v1", y = "MDS_v2") +
ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain))))
})
ggs
## $SUR
##
## $CCE
##
## $HC1
##
## $BR1
##
## $TR1
ggs <- lapply(pnts_lks, function(X) {
X <- X[grep("historical records", X$chain), ]
X$clock <- ifelse(grepl("global", X$chain), "global", "relaxed")
X$chain <- gsub("/global|/relaxed", "", X$chain)
if (nrow(X) > 0)
ggplot(data = X, aes(x = x, y = y)) +
geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
geom_point(aes(shape = clock, col = clock), size = 4) +
theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
facet_wrap(~chain, nrow = 4) +
scale_color_viridis_d(begin = 0.2, end = 0.8, alpha = 0.75) +
labs(x = "MDS_v1", y = "MDS_v2") +
ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain))))
})
ggs
## $SUR
##
## $CCE
##
## $HC1
##
## $BR1
## NULL
##
## $TR1
## NULL
ggs <- lapply(pnts_lks, function(X) {
X <- X[grep("recent records", X$chain), ]
X$clock <- ifelse(grepl("global", X$chain), "global", "relaxed")
X$chain <- gsub("/global|/relaxed", "", X$chain)
ggplot(data = X, aes(x = x, y = y)) +
geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
geom_point(aes(shape = clock, col = clock), size = 4) +
theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
facet_wrap(~chain, nrow = 4) +
scale_color_viridis_d(begin = 0.2, end = 0.8, alpha = 0.75) +
labs(x = "MDS_v1", y = "MDS_v2") +
ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain))))
})
ggs
## $SUR
##
## $CCE
##
## $HC1
##
## $BR1
##
## $TR1
#selected leks
sel_leks <- c("SUR", "CCE", "HC1", "BR1", "TR1")
# sel_leks <- c("SUR", "TR1")
tree_names <- grep("early", names(rb.trees), value = TRUE)
pnts_lks <- lapply(sel_leks, function(i) {
# print(i)
lek_trees <- grep(i, tree_names, value = TRUE)
multi_list <- lapply(lek_trees, function(x) read.tree(file.path("./output/most_recent_revbayes_models/", x), keep.multi = TRUE))
names(multi_list) <- sapply(basename(lek_trees), function(x) paste(strsplit(x, "_", fixed = TRUE)[[1]][1:5], collapse = "_"))
pnts <- try(custom_treespace(multi_list, n.points = 100, method = "RF", cl = 10), silent = TRUE)
if (!is(pnts, "try-error"))
return(pnts) else
return(NULL)
})
names(pnts_lks) <- sel_leks
sapply(pnts_lks, is.null)
saveRDS(pnts_lks, file = "./output/topology/tree_distance_early_fossils_pairwise_shared.RDS")
pnts_lks <- readRDS("./output/topology/tree_distance_early_fossils_pairwise_shared.RDS")
# fix model names
pnts_lks <- lapply(pnts_lks, function(x) {
# rename alignment models
x$chain <- gsub("all.equal", "MAFFT-agnostic", x$chain)
x$chain <- gsub("optimal", "MAFFT-optimal", x$chain)
x$chain <- gsub("prank", "PRANK-TN93", x$chain)
x$chain <- gsub("new", "recent records", x$chain)
x$chain <- gsub("old", "historical records", x$chain)
x$chain <- gsub("Uexp", "relaxed", x$chain)
x$chain <- gsub("_all", "", x$chain)
# x$chain <- gsub("_", "/", x$chain)
return(x)
})
ggs <- lapply(pnts_lks, function(X) {
ggplot(data = X, aes(x = x, y = y, fill = generation)) +
geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
scale_colour_gradient(low = "red", high = "yellow") +
geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
facet_wrap(~chain, nrow = 4) +
scale_fill_gradientn(colours = viridis(256)) +
labs(x = "MDS_v1", y = "MDS_v2") +
ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
})
ggs
## $SUR
##
## $CCE
##
## $HC1
##
## $BR1
##
## $TR1
ggs <- lapply(pnts_lks, function(X) {
X <- X[grep("recent records", X$chain), ]
ggplot(data = X, aes(x = x, y = y, fill = generation)) +
geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
scale_colour_gradient(low = "red", high = "yellow") +
geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
facet_wrap(~chain, nrow = 4) +
scale_fill_gradientn(colours = viridis(256)) +
labs(x = "MDS_v1", y = "MDS_v2") +
ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
})
ggs
## $SUR
##
## $CCE
##
## $HC1
##
## $BR1
##
## $TR1
ggs <- lapply(pnts_lks, function(X) {
X <- X[grep("historical records", X$chain), ]
if (nrow(X) > 0)
ggplot(data = X, aes(x = x, y = y, fill = generation)) +
geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
scale_colour_gradient(low = "red", high = "yellow") +
geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
facet_wrap(~chain, nrow = 4) +
scale_fill_gradientn(colours = viridis(256)) +
labs(x = "MDS_v1", y = "MDS_v2") +
ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
})
ggs
## $SUR
##
## $CCE
##
## $HC1
##
## $BR1
## NULL
##
## $TR1
## NULL
ggs <- lapply(pnts_lks, function(X) {
X <- X[grep("historical records", X$chain), ]
X$clock <- ifelse(grepl("global", X$chain), "Global", "relaxex")
X$chain <- gsub("/global|/relaxed|/Uexp", "", X$chain)
if (nrow(X) > 0)
ggplot(data = X, aes(x = x, y = y)) +
geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
geom_point(aes(shape = clock, col = clock), size = 4) +
theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
facet_wrap(~chain, nrow = 4) +
scale_color_viridis_d(begin = 0.2, end = 0.8, alpha = 0.75) +
labs(x = "MDS_v1", y = "MDS_v2") +
ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
})
ggs
## $SUR
##
## $CCE
##
## $HC1
##
## $BR1
## NULL
##
## $TR1
## NULL
ggs <- lapply(pnts_lks, function(X) {
X <- X[grep("recent records", X$chain), ]
if (X)
X$clock <- ifelse(grepl("global", X$chain), "Global", "relaxed")
X$chain <- gsub("/global|/relaxed|/Uexp", "", X$chain)
ggplot(data = X, aes(x = x, y = y)) +
geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
geom_point(aes(shape = clock, col = clock), size = 4) +
theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
facet_wrap(~chain, nrow = 4) +
scale_color_viridis_d(begin = 0.2, end = 0.8, alpha = 0.75) +
labs(x = "MDS_v1", y = "MDS_v2") +
ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
})
ggs
#selected leks
sel_leks <- c("SUR", "CCE", "HC1", "BR1", "TR1")
# relax molecular clock
tree_names <- grep("global", names(rb.trees), value = TRUE)
# all data for leks with historical data
# una figura por lek, comparando los tres alineamientos y usando el reloj molecular relajado y "all" fossils (o early si es que hay un problema sacando árboles con puntas compartidas para "all"). Para los leks BR1 y TR1 esto serÃa obviamente sólo con los fósiles recientes. Para SUR, CCE y HC1 mejor incluyendo a los old gapped.
tree_names <- tree_names[grepl("BR1|TR1", tree_names) & grepl("_all_", tree_names) | grepl("SUR|CCE|HC1", tree_names) & !grepl("early|new|no.fossils", tree_names)]
pnts_lks <- lapply(sel_leks, function(i) {
print(i)
lek_trees <- grep(i, tree_names, value = TRUE)
multi_list <- lapply(lek_trees, function(x) read.tree(file.path("./output/most_recent_revbayes_models/", x), keep.multi = TRUE))
names(multi_list) <- sapply(basename(lek_trees), function(x) paste(strsplit(x, "_", fixed = TRUE)[[1]][1:5], collapse = "_"))
pnts <- try(custom_treespace(multi_list, n.points = 100, method = "RF", cl = 10), silent = TRUE)
if (!is(pnts, "try-error"))
return(pnts) else
return(NULL)
})
names(pnts_lks) <- sel_leks
sapply(pnts_lks, is.null)
# put all in a single data frame
pnts_lks_df <- do.call(rbind, pnts_lks)
saveRDS(pnts_lks_df, file = "./output/topology/tree_distance_all_data_by_lek_and_alignment.RDS")
# read data
pnts_lks_df <- readRDS("./output/topology/tree_distance_all_data_by_lek_and_alignment.RDS")
# rename alignment models
pnts_lks_df$chain[grep("all.equal", pnts_lks_df$chain)] <- "MAFFT-agnostic"
pnts_lks_df$chain[grep("optimal", pnts_lks_df$chain)] <- "MAFFT-optimal"
pnts_lks_df$chain[grep("prank", pnts_lks_df$chain)] <- "PRANK-TN93"
ggplot(data = pnts_lks_df, aes(x = x, y = y, fill = generation)) +
geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
scale_colour_gradient(low = "red", high = "yellow") +
geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
facet_grid(scales = "free", cols = vars(chain), rows = vars(lek)) +
scale_fill_gradientn(colours = viridis(256)) +
labs(x = "MDS_v1", y = "MDS_v2") +
ggtitle(label = sprintf("Tree space for %d trees", nrow(pnts_lks_df) / length(unique(pnts_lks_df$chain)))) +
theme_light(base_size = 30)
R session information
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=pt_BR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=es_CR.UTF-8 LC_COLLATE=pt_BR.UTF-8
## [5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=pt_BR.UTF-8
## [7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] rwty_1.0.2 ape_5.5 ggplot2_3.3.3 kableExtra_1.3.4
## [5] knitr_1.33 viridis_0.6.1 viridisLite_0.4.0 pbapply_1.4-3
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.6 svglite_2.0.0 lattice_0.20-41 digest_0.6.27
## [5] utf8_1.2.1 R6_2.5.0 plyr_1.8.6 evaluate_0.14
## [9] coda_0.19-4 httr_1.4.2 highr_0.9 pillar_1.6.0
## [13] rlang_0.4.11 rstudioapi_0.13 jquerylib_0.1.4 phangorn_2.7.0
## [17] Matrix_1.3-2 rmarkdown_2.8 labeling_0.4.2 webshot_0.5.2
## [21] stringr_1.4.0 igraph_1.2.6 munsell_0.5.0 compiler_4.0.5
## [25] xfun_0.22 pkgconfig_2.0.3 systemfonts_1.0.2 htmltools_0.5.1.1
## [29] tidyselect_1.1.1 tibble_3.1.1 gridExtra_2.3 quadprog_1.5-8
## [33] codetools_0.2-18 reshape_0.8.8 fansi_0.4.2 crayon_1.4.1
## [37] dplyr_1.0.6 withr_2.4.2 MASS_7.3-53.1 grid_4.0.5
## [41] nlme_3.1-152 jsonlite_1.7.2 GGally_2.1.1 gtable_0.3.0
## [45] lifecycle_1.0.0 magrittr_2.0.1 scales_1.1.1 stringi_1.6.1
## [49] farver_2.1.0 reshape2_1.4.4 xml2_1.3.2 bslib_0.2.4
## [53] ellipsis_0.3.2 ggdendro_0.1.22 generics_0.1.0 vctrs_0.3.8
## [57] fastmatch_1.1-0 RColorBrewer_1.1-2 tools_4.0.5 glue_1.4.2
## [61] purrr_0.3.4 parallel_4.0.5 yaml_2.2.1 colorspace_2.0-1
## [65] rvest_1.0.0 sass_0.3.1